program vp_dg

 use mod_utils, only: &
   t_realtime, my_second

 use mod_messages, only: &
   mod_messages_constructor, &
   mod_messages_destructor,  &
   error,   &
   warning, &
   info

 use mod_kinds, only: &
   mod_kinds_constructor, &
   mod_kinds_destructor,  &
   wp

 !$ use mod_omp_utils, only: &
 !$   mod_omp_utils_constructor, &
 !$   mod_omp_utils_destructor,  &
 !$   detailed_timing_omp

 use mod_fu_manager, only: &
   mod_fu_manager_constructor, &
   mod_fu_manager_destructor,  &
   new_file_unit

 use mod_octave_io, only: &
   mod_octave_io_constructor, &
   mod_octave_io_destructor,  &
   real_format,    &
   write_octave,   &
   read_octave,    &
   read_octave_al, &
   locate_var
 
 use mod_tps_phs_grid, only: &
   mod_tps_phs_grid_constructor, &
   mod_tps_phs_grid_destructor,  &
   t_real_lists, &
   t_tps_phs_grid, new_tps_phs_grid, clear, &
   write_octave, read_octave_al
 
 use mod_linal, only: &
   mod_linal_constructor, &
   mod_linal_destructor

 use mod_perms, only: &
   mod_perms_constructor, &
   mod_perms_destructor

 use mod_numquad, only: &
   mod_numquad_constructor, &
   mod_numquad_destructor,  &
   get1d_gauss_nodes, get1d_gausslobatto_nodes
 
 use mod_mpi_utils, only: &
   mod_mpi_utils_constructor, &
   mod_mpi_utils_destructor,  &
   mpi_comm_world, &
   mpi_init, mpi_thread_single, mpi_thread_multiple, &
   mpi_finalize

 use mod_sparse, only: &
   mod_sparse_constructor, &
   mod_sparse_destructor

 use mod_octave_io_sparse, only: &
   mod_octave_io_sparse_constructor, &
   mod_octave_io_sparse_destructor

 use mod_tps_base, only: &
   mod_tps_base_constructor, &
   mod_tps_base_destructor,  &
   t_tps_base, new_tps_base, clear, &
   write_octave
 
 use mod_state_vars, only: &
   mod_state_vars_constructor, &
   mod_state_vars_destructor,  &
   c_stv

 use mod_output_control, only: &
   mod_output_control_constructor, &
   mod_output_control_destructor,  &
   elapsed_format, &
   base_name

 use mod_f_state, only: &
   mod_f_state_constructor, &
   mod_f_state_destructor,  &
   t_f_state, t_e_field, &
   new_f_state, new_e_field, clear, &
   write_octave
 
 use mod_testcases, only: &
   mod_testcases_constructor, &
   mod_testcases_destructor,  &
   test_name, test_description, &
   coeff_init, coeff_e, coeff_p

 use mod_time_integrators, only: &
   mod_time_integrators_constructor, &
   mod_time_integrators_destructor,  &
   c_ode, c_ods, c_tint, t_ti_init_data, t_ti_step_diag, &
   t_ee, t_hm, t_rk4, t_ssprk54, &
   t_thetam, t_bdf1, t_bdf2, t_bdf3, t_bdf2ex, &
   t_erb2kry, t_erb2lej, &
   t_pcexpo
 
 use mod_vp_ode, only: &
   mod_vp_ode_constructor, &
   mod_vp_ode_destructor,  &
   t_vp_ode, compute_diags, compute_electric_field, p0, &
   new_vp_ode, clear
   
 use mod_linsolver, only: &
   mod_linsolver_constructor, &
   mod_linsolver_destructor


 implicit none

 ! Define some general parameters
 integer, parameter :: max_char_len = 10000
 character(len=*), parameter :: this_prog_name = 'vp_dg'

 ! IO variables
 character(len=*), parameter :: input_file_name_def = 'vp-dg.in'
 character(len=max_char_len) :: input_file_name
 character(len=*), parameter :: out_file_nml_suff = '-nml.octxt'
 character(len=max_char_len+len(out_file_nml_suff)):: out_file_nml_name
 character(len=max_char_len) :: basename
 logical :: write_grid
 character(len=*), parameter :: out_file_grid_suff = '-grid.octxt'
 character(len=max_char_len+len(out_file_grid_suff))::out_file_grid_name
 character(len=*), parameter :: out_file_base_suff = '-base.octxt'
 character(len=max_char_len+len(out_file_base_suff))::out_file_base_name
 integer :: fu, ierr

 ! Grid
 type(t_real_lists), allocatable :: xx(:), vv(:)
 type(t_tps_phs_grid), target :: grid
 character(len=max_char_len) :: grid_file

 ! FE basis
 logical :: alpha_reconstruction
 integer :: k(2) ! polynomial degree for x and v
 character(len=max_char_len) :: x_vspace
 type(t_tps_base), target :: base(2)

 ! Some grid/base related variables useful for visualization
 integer, allocatable :: xv_idx(:,:)
 real(wp), allocatable :: x_points(:,:,:), v_points(:,:,:), &
   xv_points(:,:,:,:), xx_points(:,:,:,:)
 
 ! Test case
 character(len=max_char_len) :: testname
 character(len=max_char_len) :: integrator

 ! Unknowns
 type(t_f_state) :: uuu0, uuun
 type(t_e_field) :: e_field

 ! Initial condition
 integer :: shape_in(2), shape_out(2), ie, ix, iv, i
 real(wp) :: rescale
 real(wp), allocatable :: xe(:,:,:), ve(:,:,:)

 ! Time integration
 integer :: nstep, n, n_out
 real(wp) :: dt, tt_sta, tt_end, t_nm1, t_n
 logical :: l_output1, l_output2
 integer :: nout1, nout2
 type(t_vp_ode)  :: vp_ode
 type(t_ti_init_data) :: ti_init_data
 type(t_ti_step_diag) :: ti_step_diag
 class(c_tint), allocatable :: tint

 real(wp), parameter :: pi = 3.1415926535897932384626433832795028_wp
 ! Parameters for the definition of the initial condition
 character(len=max_char_len) :: message(1)

 ! Timing
 real(t_realtime) :: t00, t0, t1, t0step

 ! MPI variables
 logical, parameter :: master_proc = .true.

 ! Input namelist
 namelist /input/ &
   ! test case
   testname, &
   ! output base name
   basename, &
   ! grid
   write_grid, &
   grid_file,  &
   ! base
   k, alpha_reconstruction, x_vspace, &
   ! time stepping
   tt_sta, tt_end, dt, &
   integrator, &
   nout1, nout2  ! output frequency


 call mod_messages_constructor()

 call mod_kinds_constructor()

 !$ if(detailed_timing_omp) call mod_omp_utils_constructor()
 
 call mod_fu_manager_constructor()

 call mod_octave_io_constructor()

 call mod_linal_constructor()

 call mod_perms_constructor()

 call mod_numquad_constructor()

 call mod_state_vars_constructor()

 !call mpi_init(ierr)
 call mpi_init_thread(mpi_thread_multiple,i,ierr)
 call mod_mpi_utils_constructor()

 !----------------------------------------------------------------------
 ! Read input file
 if(command_argument_count().gt.0) then
   call get_command_argument(1,value=input_file_name)
 else
   input_file_name = input_file_name_def
 endif
 call new_file_unit(fu,ierr)
 open(fu,file=trim(input_file_name), &
      status='old',action='read',form='formatted',iostat=ierr)
 read(fu,input)
 close(fu,iostat=ierr)
 !! If there are more processes, each of them needs its own basename
 !! and grid
 !if(mpi_nd.gt.1) then
 !  write(in_basename,'(a,a,i3.3)') trim(in_basename),'-P',mpi_id
 !  write(grid_file  ,'(a,a,i3.3)') trim(grid_file)  ,'.' ,mpi_id
 !endif
 ! echo the input namelist
 out_file_nml_name = trim(basename)//out_file_nml_suff
 open(fu,file=trim(out_file_nml_name), &
      status='replace',action='write',form='formatted',iostat=ierr)
 write(fu,input)
 close(fu,iostat=ierr)
 !----------------------------------------------------------------------

 call mod_output_control_constructor(basename)

 !----------------------------------------------------------------------
 ! Define the grid
 t0 = my_second()

 call mod_tps_phs_grid_constructor()

 call new_file_unit(fu,ierr)
 open(fu,file=trim(grid_file), &
      status='old',action='read',form='formatted',iostat=ierr)
 call read_octave_al(xx,'xx',fu) ! implies allocate(xx)
 call read_octave_al(vv,'vv',fu) ! implies allocate(vv)
 close(fu,iostat=ierr)
 call new_tps_phs_grid(grid,xx,vv)

 ! write the octave output
 if(write_grid) then
   out_file_grid_name = trim(base_name)//out_file_grid_suff
   call new_file_unit(fu,ierr)
   open(fu,file=trim(out_file_grid_name), &
        status='replace',action='write',form='formatted',iostat=ierr)
   call write_octave(grid,'grid',fu)
   close(fu,iostat=ierr)
 endif

 t1 = my_second()
 write(message(1),elapsed_format) &
   'Completed grid construction: elapsed time ',t1-t0,'s.'
 if(master_proc) &
   call info(this_prog_name,'',message(1))
 !----------------------------------------------------------------------

 !---------------------------------------------------------------------
 ! Define the basis
 t0 = my_second()

 call mod_tps_base_constructor()

 x_vspace_case: select case(trim(x_vspace))
  case("dg")
   call new_tps_base(base(1),k(1),grid%gx%d) ! k(1) scalars and vectors
  case("rt")
   call new_tps_base(base(1),k(1),grid%gx%d,k(1)+1)
  case default
   call error(this_prog_name,"", &
     'Unknown vector space "'//trim(x_vspace)//'".')
 end select x_vspace_case
 call new_tps_base(base(2),k(2),grid%gv%d)

 ! Write some additional output which is useful for the postprocessing
 out_file_base_name = trim(base_name)//out_file_base_suff
 call new_file_unit(fu,ierr)
 open(fu,file=trim(out_file_base_name), &
      status='replace',action='write',form='formatted',iostat=ierr)
 call write_octave(alpha_reconstruction,'alpha_reconstruction',fu)
 call write_octave(base(1),'x_base',fu)
 call write_octave(base(2),'v_base',fu)
 ! We don't close here this file since some additional output will be
 ! added later regarding the degrees of freedom and the Poisson
 ! problem.

 t1 = my_second()
 write(message(1),elapsed_format) &
   'Completed FE basis construction: elapsed time ',t1-t0,'s.'
 if(master_proc) &
   call info(this_prog_name,'',message(1))
 !----------------------------------------------------------------------

 !----------------------------------------------------------------------
 ! Linear solver
 call mod_sparse_constructor()
 call mod_octave_io_sparse_constructor()
 call mod_linsolver_constructor()
 !----------------------------------------------------------------------

 !----------------------------------------------------------------------
 ! Allocations and initial condition
 t0 = my_second()

 call mod_time_integrators_constructor()

 call mod_f_state_constructor()
 call mod_vp_ode_constructor(grid,base,fu,alpha_reconstruction)

 call new_f_state(uuu0   ,grid,base)
 call new_f_state(uuun   ,grid,base)
 call new_e_field(e_field,grid,base)
 call new_vp_ode( vp_ode ,grid,base)

 call mod_testcases_constructor(testname,(/grid%gx%d,grid%gv%d/))

 allocate( xe( grid%gx%d , base(1)%pkd , base(2)%pkd ) , &
           ve( grid%gv%d , base(1)%pkd , base(2)%pkd ) )
 shape_in  = (/ grid%gv%d , base(1)%pkd*base(2)%pkd /) ! to reshape
 shape_out = (/             base(1)%pkd,base(2)%pkd /)
 if(write_grid) allocate( xv_points( grid%gx%d+grid%gv%d , &
                     base(1)%pkd , base(2)%pkd , grid%ne ) )
 do ie=1,grid%ne
   ! For each element, we need to generate the nodal points: these
   ! points are generated by tensor product; first we let the x
   ! coordinate vary and keep v fixed, then we let v vary.

   x_coords_do: do ix=1,base(1)%pkd
     do i=1,grid%gx%d
       xe(i,ix,:) = & ! the same x corresponds to many v points
         grid%e(ie)%e1(1)%p%bw(   i)*base(1)%xgl(base(1)%mldx(ix,i)) &
       + grid%e(ie)%e1(1)%p%box(1,i)
     enddo
   enddo x_coords_do

   v_coords_do: do iv=1,base(2)%pkd
     do i=1,grid%gv%d
       ve(i,:,iv) = & ! the same v corresponds to many x points
         grid%e(ie)%e1(2)%p%bw(   i)*base(2)%xgl(base(2)%mldx(iv,i)) &
       + grid%e(ie)%e1(2)%p%box(1,i)
     enddo
   enddo v_coords_do

   uuu0%f(:,:,ie) = reshape(                                          &
    coeff_init(reshape(xe,shape_in),reshape(ve,shape_in)) , shape_out )

   if(write_grid) then ! store the coordinates for output
     xv_points(1:grid%gx%d ,:,:,ie) = xe
     xv_points(grid%gx%d+1:,:,:,ie) = ve
   endif
 enddo
 deallocate( xe , ve )

 ! Complete writing the base output file
 if(write_grid) then
   ! Use these fields in octave as
   !   [X,Y]=meshgrid(x_points(:),v_points(:));
   !   Z=uuu.f(:)(xv_idx);
   !   surf(X,Y,Z')
   !
   ! And to make plots in 2D:
   !
   ! plot3(x_points(1,:,:)(:),x_points(2,:,:)(:),uuu.r(:),'o')
   ! quiver(x_points(1,:,:)(:),x_points(2,:,:)(:), ...
   !        uuu.et(1,1,:,:)(:),uuu.et(2,1,:,:)(:), 0)
   !
   ! To plot the distribution function in v
   ! figure
   ! hold on
   ! iex = 1; ix = 1
   ! pk = v_base.pk
   ! for ie=1:grid.ne
   !   if(grid.e(ie).e1(1)==iex)
   !     f = reshape(uuu.f(ix,:,ie),[pk pk])
   !     x = reshape(v_points(1,:,grid.e(ie).e1(2)),[pk pk])
   !     y = reshape(v_points(2,:,grid.e(ie).e1(2)),[pk pk])
   !     surf(x,y,f)
   !   end
   ! end
   call fill_xv_points() ! only needed for output
   call write_octave((/( size(xx(i)%x)-1,i=1,size(xx) )/),'r','nex',fu)
   call write_octave((/( size(vv(i)%x)-1,i=1,size(vv) )/),'r','nev',fu)
   call write_octave(xv_idx   ,'xv_idx'   ,fu)
   call write_octave(x_points ,'x_points' ,fu)
   call write_octave(v_points ,'v_points' ,fu)
   call write_octave(xv_points,'xv_points',fu)
   call write_octave(xx_points,'xx_points',fu)
   deallocate( xv_idx , x_points , v_points , xv_points , xx_points )
 endif
 deallocate(xx,vv)
 call write_octave(test_name       ,'test_name'       ,fu)
 call write_octave(test_description,'test_description',fu)

 call write_octave(uuu0%f,"f0",fu)
 close(fu,iostat=ierr)

 call check_initial_condition(uuu0,rescale,i)
 if(i.gt.0) then
   write(message(1),'(a,e10.3)') &
     "Bad scaling of the initial condition: scale = ", rescale
   call error('vp-dg','',message(1))
 else
   write(message(1),'(a,sp,e10.3)') &
     "Scaling of the initial condition: scale = 1", rescale-1.0_wp
   call info('vp-dg','',message(1))
 endif

 t1 = my_second()
 write(message(1),elapsed_format) &
   'Completed start-up phase: elapsed time ',t1-t0,'s.'
 if(master_proc) &
   call info(this_prog_name,'',message(1))
 !----------------------------------------------------------------------

 !----------------------------------------------------------------------
 ! Time integration
 !
 !   0        1                 n             nstep
 !   |--------|--------|--------|--------|------|
 ! tt_sta            t_nm1     t_n            tt_end
 !                     | step_n |
 !

 ! Set time integration method
 time_int_case: select case(trim(integrator))
  case('rk4')
   allocate(t_rk4::tint)
  case('ssprk54')
   allocate(t_ssprk54::tint)
  case('erb2kry')
   allocate(t_erb2kry::tint)
   ti_init_data%dim = 150       ! size of the Krylov space
   ti_init_data%tol = 1.0e-3_wp ! tolerance
  case('erb2lej')
   allocate(t_erb2lej::tint)
   ti_init_data%dim = 150       ! maximum number of Leja points
   ti_init_data%tol = 1.0e-2_wp ! tolerance
   allocate(ti_init_data%ir1(2)); ti_init_data%ir1 = (/-76.0_wp,0.0_wp/)
  case('pcexpo')
   allocate(t_pcexpo::tint)
   ti_init_data%dim = 50
   ti_init_data%tol = 1.0e-6_wp
   ti_init_data%mpi_id = 0
   ti_init_data%mpi_comm = mpi_comm_world
   allocate(ti_init_data%ii1(1)); ti_init_data%ii1 = (/1/) ! Kry/Lej
   allocate(ti_init_data%ir1(2)); ti_init_data%ir1 = (/-20.0_wp,0.1_wp/)
  case default
   call error(this_prog_name, '',&
    'Unknown integration method "'//trim(integrator)//'".')
 end select time_int_case
 
 nstep = ceiling(tt_end/dt)
 n_out = 0
 call write_out2(grid,base,tt_sta,uuu0,e_field,.true.,ti_step_diag)
 ! the next call also updates the electric field
 call write_out2(grid,base,tt_sta,uuu0,e_field,.false.,ti_step_diag)
 call write_out1(0,tt_sta,n_out,uuu0,e_field,ti_step_diag)
 !call print_error(uuu0,e_field) ! enable if known exact solution

 call tint%init( vp_ode,dt,0.0_wp,uuu0,e_field,         &
        init_data=ti_init_data , step_diag=ti_step_diag )
 t0 = my_second()
 time_loop: do n=1,nstep

   t_nm1 = tt_sta + real(n-1,wp)*dt
   t_n   = t_nm1 + dt

   call tint%step(uuun,vp_ode,t_nm1,uuu0,e_field,ti_step_diag)
   uuu0%f = uuun%f

   ! Select how often the full distribution is written
   l_output1 = (mod(n,nout1).eq.0).or.(n.eq.1).or.(n.eq.nstep)
   if(l_output1) then
     write(*,*) 'Step ',n,' of ',nstep
     n_out = n_out + 1
     call compute_electric_field(e_field,uuu0)
     call write_out1(n,t_n,n_out,uuu0,e_field,ti_step_diag)
     ! Update the initial guess for the linear solver
     p0 = e_field%p
   endif

   ! Select how often the integrated diagnostics are written
   l_output2 = (mod(n,nout2).eq.0).or.(n.eq.1).or.(n.eq.nstep)
   if(l_output2) &
     call write_out2(grid,base,t_n,uuu0,e_field,.false.,ti_step_diag)

 enddo time_loop
 t1 = my_second()
 write(message(1),elapsed_format) &
   'Completed time loop: elapsed time ',t1-t0,'s.'
 if(master_proc) &
   call info(this_prog_name,'',message(1))

 call tint%clean(vp_ode,ti_step_diag)
 deallocate(tint)

 call mod_testcases_destructor()

 call clear(vp_ode)
 call mod_vp_ode_destructor()

 call clear(e_field)
 call clear(uuu0)
 call clear(uuun)
 call mod_f_state_destructor()

 call mod_time_integrators_destructor()

 call mod_linsolver_destructor()
 call mod_octave_io_sparse_destructor()
 call mod_sparse_destructor()

 call clear(base(1)); call clear(base(2))
 call mod_tps_base_destructor()

 call clear(grid)
 call mod_tps_phs_grid_destructor()

 call mod_output_control_destructor()

 call mod_mpi_utils_destructor()
 call mpi_finalize(ierr)

 call mod_state_vars_destructor()

 call mod_numquad_destructor()

 call mod_perms_destructor()

 call mod_linal_destructor()

 call mod_octave_io_destructor()

 call mod_fu_manager_destructor()

 !$ if(detailed_timing_omp) call mod_omp_utils_destructor()

 call mod_kinds_destructor()

 call mod_messages_destructor()

contains

 subroutine fill_xv_points()

  integer :: ie, ix, iv, id, jd, ixm(grid%d), ivm(grid%d)
  
  allocate(xv_idx(base(1)%pkd*grid%gx%ne,base(2)%pkd*grid%gv%ne))
  do ie=1,grid%ne
    do iv=1,base(2)%pkd
      do ix=1,base(1)%pkd
        xv_idx( (grid%e(ie)%e1(1)%p%o-1)*base(1)%pkd + ix ,      &
                (grid%e(ie)%e1(2)%p%o-1)*base(2)%pkd + iv ) =    &
          ix + (iv-1)*base(1)%pkd + (ie-1)*base(1)%pkd*base(2)%pkd
      enddo
    enddo
  enddo

  allocate(x_points(grid%d,base(1)%pkd,grid%gx%ne))
  do ie=1,grid%gx%ne
    do ix=1,size(x_points,2)
      ixm = base(1)%mldx(ix,:) ! space multiindex

      do id=1,grid%d
        x_points(id,ix,ie) =                        &
          grid%gx%e(ie)%bw(id)*base(1)%xgl(ixm(id)) &
                + grid%gx%e(ie)%box(1,id)
      enddo
    enddo
  enddo

  allocate(v_points(grid%d,base(2)%pkd,grid%gv%ne))
  do ie=1,grid%gv%ne
    do iv=1,size(v_points,2)
      ivm = base(2)%mldx(iv,:) ! space multiindex

      do id=1,grid%d
        v_points(id,iv,ie) =                        &
          grid%gv%e(ie)%bw(id)*base(2)%xgl(ivm(id)) &
                + grid%gv%e(ie)%box(1,id)
      enddo
    enddo
  enddo

  allocate(xx_points(grid%d,base(1)%pkdx,grid%gx%ne,grid%d))
  do ie=1,grid%gx%ne
    do jd=1,grid%d ! vector component
      do ix=1,size(xx_points,2)
        !ixm = base(1)%mldx(ix,:) ! space multiindex
        ixm = base(1)%mldxx(ix,jd)%mldxp ! space multiindex

        do id=1,grid%d
          xx_points(id,ix,ie,jd) =                     &
            grid%gx%e(ie)%bw(id)*base(1)%xglx(ixm(id)) &
                  + grid%gx%e(ie)%box(1,id)
        enddo
      enddo
    enddo
  enddo

 end subroutine fill_xv_points

 subroutine write_out1(n,t,n_out,uuu,ef,sd)
  integer, intent(in) :: n, n_out
  real(wp), intent(in) :: t
  type(t_f_state), intent(in) :: uuu
  type(t_e_field), intent(in) :: ef
  type(t_ti_step_diag), intent(in) :: sd

  integer :: fu, ierr
  character(len=*), parameter :: time_stamp_format = '(i4.4)'
  character(len=4) :: time_stamp
  character(len=*), parameter :: suff1 = '-res-'
  character(len=*), parameter :: suff2 = '.octxt'
  character(len= len_trim(base_name) + len(suff1) + 4 + len(suff2)) :: &
     out_file_res_name

   call new_file_unit(fu,ierr)
   write(time_stamp,time_stamp_format) n_out
   out_file_res_name = trim(base_name)//suff1//time_stamp//suff2
   open(fu,file=out_file_res_name, &
        status='replace',action='write',form='formatted',iostat=ierr)
    call write_octave(n  ,'n'  ,fu)
    call write_octave(t  ,'t'  ,fu)
    call write_octave(uuu,'uuu',fu)
    call write_octave(ef ,'ef' ,fu)
    if(allocated(sd%d1)) then
      call write_octave(sd%max_iter,'ti_max_iter',fu)
      call write_octave(sd%iterations,'ti_iter',fu)
      if(allocated(sd%d1)) &
        call write_octave(sd%d1,'r',trim(sd%name_d1),fu)
      if(allocated(sd%d2)) &
        call write_octave(sd%d2,    trim(sd%name_d2),fu)
      if(allocated(sd%d3)) &
        call write_octave(sd%d3,    trim(sd%name_d3),fu)
    endif
   close(fu,iostat=ierr)

 end subroutine write_out1

 !> Write "high-frequency" output
 !!
 !! The output is collected in one array and written as a row in an
 !! ASCII file. There is only one output file, with one row per time
 !! level.
 subroutine write_out2(grid,base,t,uuu,ef,init,sd)
  type(t_tps_phs_grid), intent(in) :: grid
  type(t_tps_base),     intent(in) :: base(2)
  real(wp),             intent(in) :: t
  type(t_f_state),      intent(in) :: uuu
  type(t_e_field),      intent(inout) :: ef
  logical,              intent(in) :: init
  type(t_ti_step_diag), intent(in) :: sd

  character(len=*), parameter :: suff = '-fast-output.octxt'
  real(wp) :: momentum(grid%d), ekin, f_integr, f_l1norm, &
    f_l2norm, f_min, f_max, e_l2norm(grid%d), p_jmnorm
  real(wp) :: data(1+2*grid%d+8)
  real(wp), allocatable :: all_data(:) ! includes optional diags
  real(wp), allocatable :: ek(:) ! spectral components of E
  character(len=5) d_size
  character(len=len(base_name) + len(suff)), save :: &
     out_file_res_name

   if(init) then
     out_file_res_name = trim(base_name)//suff
     call new_file_unit(fu,ierr)
     open(fu,file=out_file_res_name, status='replace', &
       action='write',form='formatted')
     ! Nothing to write: we just make sure the file is empty
     close(fu)
     return
   endif

   call compute_diags(uuu , ef , grid,base ,  momentum,ekin,      &
     f_integr,f_l1norm,f_l2norm,f_min,f_max,e_l2norm,p_jmnorm, ek )

   data = (/ &
       t,        & ! (1    ) time
       momentum, & ! (2   :) momentum
       ekin,     & ! (2+ d ) kinetic energy
       e_l2norm, & ! (3+ d:) 1/2*||E||^2
       p_jmnorm, & ! (3+2d ) c11/2*[[Phi]]^2
       ekin + sum(e_l2norm) + p_jmnorm, & ! (4+2d) total energy
       f_integr, & ! (5+2d ) integral of f; number of particles
       f_l1norm, & ! (6+2d ) L1(f)
       f_l2norm, & ! (7+2d ) L2(f)
       f_min,    & ! (8+2d ) min(f)
       f_max     & ! (9+2d ) max(f)
     /)

   ! Here we use reallocation on assignment: things are much easier...
   all_data = data
   if(allocated(ek)) & ! Fourier modes
     all_data = (/ all_data , ek /)
   if(allocated(sd%d1)) & ! number of iterations
     all_data = (/ all_data , real(sd%iterations,wp) /)
  
   call new_file_unit(fu,ierr)
   open(fu,file=out_file_res_name, status='old',          &
     action='readwrite',form='formatted',position='append')
    write(d_size,'(i5)') size(all_data)
    write(fu,'('//d_size//'('//real_format//')'//')') all_data
   close(fu)
   deallocate(all_data)

 end subroutine write_out2

 !> Preliminary error computation (compared to the octave
 !! postprocessing, this subroutine can exploit the quadruple
 !! precision).
 subroutine print_error(uuu,ef)
  type(t_f_state), intent(in) :: uuu
  type(t_e_field), intent(in), target :: ef

  integer :: ie, i, j, ix
  real(wp) :: epl2, eel2, eetl2(2), xg(grid%gx%d,base(1)%pkd), &
    ! xgx: considers distinct points for each vector component
    xgx(grid%gx%d,base(1)%pkdx,grid%gx%d), &
    ! xgtx: same as xgx but for tilde_e: the points are the same
    xgtx(grid%gx%d,base(1)%pkd,grid%gx%d), &
    pex(base(1)%pkd), eex(grid%gx%d,base(1)%pkdx), &
    eetx(grid%gx%d,base(1)%pkd)
  real(wp), pointer :: uuu_p(:,:)

   uuu_p(1:uuu%base(1)%pkd,1:uuu%grid%gx%ne)    &
     => ef%p%pl(1:uuu%base(1)%pkd*uuu%grid%gx%ne)

   epl2  = 0.0_wp
   eel2  = 0.0_wp
   eetl2 = 0.0_wp
   do ie=1,grid%gx%ne
     x_coords_do: do ix=1,base(1)%pkd
       do i=1,grid%gx%d
         xg(i,ix) = &
             grid%gx%e(ie)%bw(   i)*base(1)%xgl(base(1)%mldx(ix,i)) &
           + grid%gx%e(ie)%box(1,i)
       xgtx(i,ix,:) = xg(i,ix) ! same points for all the components
       enddo
     enddo x_coords_do
     x_e_coords_do: do ix=1,base(1)%pkdx
       do i=1,grid%gx%d ! coordinate component
         do j=1,grid%gx%d ! vector component
           ! The extended component must be taken from xglx, while the
           ! remaining ones must be taken from xgl
           if(j.eq.i) then ! extended component -> use xglx
        xgx(i,ix,j) = &
             grid%gx%e(ie)%bw(   i)*base(1)%xglx( &
                   base(1)%mldxx(ix,j)%mldxp(i) ) &
           + grid%gx%e(ie)%box(1,i)
           else
        xgx(i,ix,j) = &
             grid%gx%e(ie)%bw(   i)*base(1)%xgl(  &
                   base(1)%mldxx(ix,j)%mldxp(i) ) &
           + grid%gx%e(ie)%box(1,i)
           endif
         enddo
       enddo
     enddo x_e_coords_do

     ! Exact solution (notice that E is required on two point sets)
     pex  = coeff_p(0.0_wp,xg  )
     eex  = coeff_e(0.0_wp,xgx )
     eetx = coeff_e(0.0_wp,xgtx)

     epl2 = epl2 + dot_product( grid%gx%e(ie)%vol*base(1)%wgld , &
                                (pex-uuu_p(:,ie))**2 )
     do i=1,grid%gx%d
       eel2 = eel2+dot_product( grid%gx%e(ie)%vol*base(1)%wgldx(:,i), &
                                (eex(i,:)-ef%e(i,:,ie))**2 )
       ! Notice: each component of et has two "flavours" corresponding
       ! to positive and negative velocities. In principle, all the
       ! possible combinations should be considered, making 2^d
       ! possible electric fields. However, for simplicity, we just
       ! compute here two errors: one error is computed assuming v>0
       ! for all the components, and the other one assuming v<0 for
       ! all the components. In practice, these errors are very
       ! similar.
       do j=1,2
   eetl2(j) = eetl2(j)+dot_product( grid%gx%e(ie)%vol*base(1)%wgld , &
                               (eetx(i,:)-ef%et(i,j,:,ie))**2 )
       enddo
     enddo
   enddo
   epl2  = sqrt(epl2)
   eel2  = sqrt(eel2)
   eetl2 = sqrt(eetl2)

   write(*,*) "||p-ph||_L2 = ",epl2
   write(*,*) "||e-eh||_L2 = ",eel2
   write(*,*) "||e-et||_L2 = ",eetl2

   stop "Completed error computation"

 end subroutine print_error

 ! This subroutine checks the consistency of the initial condition
 ! concerning the normalization: the integral of f-1 over the entire
 ! domain must be zero.
 !
 ! If the difference is smaller than a fixes tolerance, the initial
 ! condition is rescaled, otherwise the code produces an error.
 pure subroutine check_initial_condition(uuu,scale,ierr)
  integer, intent(out) :: ierr
  real(wp), intent(out) :: scale
  type(t_f_state), intent(inout) :: uuu

  real(wp), parameter :: eps1 = 1.0e-5_wp
  real(wp), parameter :: eps2 = 10.0_wp*epsilon(1.0_wp)
  integer :: ie
  real(wp) :: integral

   integral = 0.0_wp ! phase space integral of f
   do ie=1,grid%ne
     integral = integral +                                     &
       ! element volume
       uuu%grid%e(ie)%e1(1)%p%vol*uuu%grid%e(ie)%e1(2)%p%vol * &
       ! element quadrature: x_wgl' * f * v_vgl
       dot_product(                                            &
         matmul( uuu%base(1)%wgld , uuu%f(:,:,ie) ) ,          &
                 uuu%base(2)%wgld                              &
       )
   enddo

   scale = sum(uuu%grid%gx%e%vol) / integral

   if(scale.lt.0.0_wp) then               ! something *really* wrong
     ierr = 2
   elseif(abs(scale-1.0_wp).gt.eps1) then ! bad scaling
     ierr = 1
   elseif(abs(scale-1.0_wp).gt.eps2) then ! rescale the solution
     uuu%f = scale * uuu%f
     ierr = 0
   else                                   ! nothing to do
     ierr = 0
   endif
     
 end subroutine check_initial_condition

end program vp_dg
